package getGaokaoXuankeData.controller;


import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import java.io.FileOutputStream;
import java.io.IOException;


public class GetZjData {
    //入口方法
    public static void main( String[] args ) throws IOException,IndexOutOfBoundsException {
        getSchool("2020");
    }
    //获取学校数据
    public static void getSchool(String year) throws IOException,IndexOutOfBoundsException {
        Document document = Jsoup.connect("http://zt.zjzs.net/xk2020/allcollege.html").get();
        Elements elements = document.select("table[bgcolor=#E4E4E4]").select("tr");
        Integer len1 = elements.size();

        HSSFWorkbook wb = new HSSFWorkbook();//创建HSSFWorkbook对象
        HSSFSheet sheet=wb.createSheet("选考数据");//建立sheet对象
        int hangNum = 0;//行数
        for(int i=1;i<len1;i++){
            Elements elements1 = elements.get(i).select("td");
            //System.out.println(elements1);
            //获取标签td的数量
            int tdNum = elements1.size();
            if (tdNum != 5)
                continue;
            //获取地区
            String schoolArea = elements1.get(0).text();
            //获取学校代码
            String schoolCode = elements1.get(1).text();
            //获取学校名称
            String schoolName = elements1.get(2).text();
            //获取学校官网
            String schoolUrl = elements1.get(3).text();
            //获取选科范围网址
            String selectUrl = "http://zt.zjzs.net/xk2020/"+elements1.get(4).select("a").attr("href");
            //获取该学校的专业录取情况
            Document document02 = Jsoup.connect(selectUrl).get();
            Elements elements2 = document02.getElementsByTag("tbody").select("tr");
            System.out.println(elements2);
            Integer len2 = elements2.size();
            for (int j=1;j<len2;j++){
                Elements elements3 = elements2.get(j).select("td");
                if (elements3.size() != 4)
                    continue;
                //System.out.println("第"+j+"次");
                System.out.println("打印"+elements3+"\n");
                //获取层次
                String schoolLevel = elements3.get(0).text();
                //获取专业(类)名称
                String zhuanye = elements3.get(1).text();
                //获取选考科目数,2020年的数据没有这条
                //String kmNum = elements3.get(2).text();
                //选考科目范围
                String fanwei = elements3.get(2).text();
                //类中所含专业
                String lzshzy = elements3.get(3).text();
                //打印行数
                //System.out.println("当前行数是："+hangNum+"，当前学校："+schoolName+"，专业名称："+zhuanye+"，类中所含专业："+lzshzy);
                //写入excel
                HSSFRow row1=sheet.createRow(hangNum); //在sheet里创建第一行，参数为行索引
                row1.createCell(0).setCellValue(schoolArea);//设置地区
                row1.createCell(1).setCellValue(schoolCode);//学校代码
                row1.createCell(2).setCellValue(schoolName);//学校名称
                row1.createCell(3).setCellValue(schoolUrl);//学校官网
                row1.createCell(4).setCellValue(schoolLevel);//学校层次
                row1.createCell(5).setCellValue(zhuanye);//专业(类)名称
                //row1.createCell(6).setCellValue(kmNum);//选考科目数
                row1.createCell(6).setCellValue(fanwei);//选考范围
                row1.createCell(7).setCellValue(lzshzy);//类中所含专业
                row1.createCell(8).setCellValue(year);//年份
                hangNum +=1;//行数+1
            }
        }
        //输出Excel文件
        FileOutputStream output=new FileOutputStream("h:\\浙江"+year+"年数据.xls");
        wb.write(output);
        output.flush();
    }
}
